# load packages
library(readr)
library(dplyr)
library(ggplot2)
library(gridExtra)
library(corrplot)
library(plotly)
library(reshape2)
library(car)

Setting up Data

# import dataset
female <- read_csv("/Users/hollyduong/Desktop/DA 401/ChildMarriageInVietnam/Data/wm.csv")
# get a glimpse of dataset
head(female)
## # A tibble: 6 × 458
##     HH1   HH2    LN   WM1   WM2   WM3 WMINT   WM4   WM5  WM6D  WM6M  WM6Y   WM8
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1     2     3     1     2     3    92    91    92    19    11  2020     2
## 2     1     4     2     1     4     2    92    91    92    18    11  2020     1
## 3     1     9     4     1     9     4    92    91    92    18    11  2020     1
## 4     1    10     4     1    10     4    92    91    92    18    11  2020     2
## 5     1    11     4     1    11     4    92    91    92    19    11  2020     2
## 6     1    11     5     1    11     5    92    91    92    18    11  2020     2
## # ℹ 445 more variables: WM9 <dbl>, WM17 <dbl>, WM7H <dbl>, WM7M <dbl>,
## #   WM10H <dbl>, WM10M <dbl>, WM11 <dbl>, WM12 <dbl>, WM13 <dbl>, WM14 <dbl>,
## #   WM15 <dbl>, WM22 <dbl>, WM23 <dbl>, WM24 <dbl>, WMHINT <dbl>, WMFIN <dbl>,
## #   WB3M <dbl>, WB3Y <dbl>, WB4 <dbl>, WB5 <dbl>, WB6A <dbl>, WB6B <dbl>,
## #   WB7 <dbl>, WB9 <lgl>, WB10A <lgl>, WB10B <lgl>, WB11 <lgl>, WB12A <lgl>,
## #   WB12B <lgl>, WB14 <dbl>, WB15 <dbl>, WB16 <dbl>, WB17 <dbl>, WB18 <dbl>,
## #   WB19A <chr>, WB19B <chr>, WB19C <chr>, WB19D <chr>, WB19E <chr>, …
# Select the specified columns to create a new dataframe
female_df <- select(female, WAGEM, HH6, HH7, MSTATUS, welevel, insurance, ethnicity, windex5, CP2, HA1, MT4, MT9, MT11)

# View the first few rows of the new dataframe
summary(female_df)
##      WAGEM            HH6             HH7           MSTATUS         welevel    
##  Min.   :10.00   Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :0.00  
##  1st Qu.:18.00   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.00  
##  Median :20.00   Median :2.000   Median :3.000   Median :1.000   Median :2.00  
##  Mean   :20.92   Mean   :1.683   Mean   :3.404   Mean   :1.413   Mean   :2.46  
##  3rd Qu.:23.00   3rd Qu.:2.000   3rd Qu.:5.000   3rd Qu.:1.000   3rd Qu.:3.00  
##  Max.   :47.00   Max.   :2.000   Max.   :6.000   Max.   :9.000   Max.   :9.00  
##  NA's   :2026    NA's   :11      NA's   :11      NA's   :11      NA's   :524   
##    insurance       ethnicity        windex5           CP2       
##  Min.   :1.000   Min.   :1.000   Min.   :0.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :1.135   Mean   :2.035   Mean   :2.494   Mean   :1.431  
##  3rd Qu.:1.000   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :6.000   Max.   :5.000   Max.   :9.000  
##  NA's   :524                                     NA's   :864    
##       HA1             MT4             MT9             MT11     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.00  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.00  
##  Median :1.000   Median :2.000   Median :1.000   Median :1.00  
##  Mean   :1.215   Mean   :1.664   Mean   :1.365   Mean   :1.12  
##  3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:1.00  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.00  
##  NA's   :524     NA's   :524     NA's   :2496    NA's   :524
# Rename the columns
female_df <- female_df %>%
  rename(
  age_first_marriage = WAGEM,
  area = HH6,
  region = HH7,
  marital_status = MSTATUS,
  education_level = welevel,
  health_insurance = insurance,
  ethnicity = ethnicity,
  wealth_index = windex5,
#  contraceptive_decision_maker = CP5, #R
  current_contraceptive_use = CP2,
# age_first_sexual_intercourse = SB1, #R
#  currently_married = MA1,
#  partner_age = MA2, #R
  awareness_hiv_aids = HA1,
  used_computer_tablet = MT4,
  used_internet = MT9,
  owns_mobile_phone = MT11
)
# Summarize missing values by column
summarize_missing <- sapply(female_df, function(x) sum(is.na(x)))
print(summarize_missing)
##        age_first_marriage                      area                    region 
##                      2026                        11                        11 
##            marital_status           education_level          health_insurance 
##                        11                       524                       524 
##                 ethnicity              wealth_index current_contraceptive_use 
##                         0                         0                       864 
##        awareness_hiv_aids      used_computer_tablet             used_internet 
##                       524                       524                      2496 
##         owns_mobile_phone 
##                       524
# Recode the value 9 to NA for specified variables
female_df <- female_df %>%
  mutate(
    current_contraceptive_use = na_if(current_contraceptive_use, 9),
    used_internet = na_if(used_internet, 9),
    health_insurance = na_if(health_insurance, 9),
    education_level = na_if(education_level, 9),
    awareness_hiv_aids = na_if(awareness_hiv_aids, 9),
    used_computer_tablet = na_if(used_computer_tablet, 9),
    owns_mobile_phone = na_if(owns_mobile_phone, 9),
    marital_status = na_if(marital_status, 9)
  )

# Recode the value 6 to NA for 'ethnicity'
female_df$ethnicity <- na_if(female_df$ethnicity, 6)
# Recoding variables from 1 (Yes) and 2 (No) to 1 (Yes) and 0 (No)
female_df$health_insurance <- ifelse(female_df$health_insurance == 2, 0, female_df$health_insurance)
female_df$current_contraceptive_use <- ifelse(female_df$current_contraceptive_use == 2, 0, female_df$current_contraceptive_use)
female_df$awareness_hiv_aids <- ifelse(female_df$awareness_hiv_aids == 2, 0, female_df$awareness_hiv_aids)
female_df$used_computer_tablet <- ifelse(female_df$used_computer_tablet == 2, 0, female_df$used_computer_tablet)
female_df$owns_mobile_phone <- ifelse(female_df$owns_mobile_phone == 2, 0, female_df$owns_mobile_phone)
female_df$used_internet <- ifelse(female_df$used_internet == 2, 0, female_df$used_internet)
# View the changes to ensure NA substitution has been correctly applied
summary(female_df)
##  age_first_marriage      area           region      marital_status 
##  Min.   :10.00      Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:18.00      1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :20.00      Median :2.000   Median :3.000   Median :1.000  
##  Mean   :20.92      Mean   :1.683   Mean   :3.404   Mean   :1.408  
##  3rd Qu.:23.00      3rd Qu.:2.000   3rd Qu.:5.000   3rd Qu.:1.000  
##  Max.   :47.00      Max.   :2.000   Max.   :6.000   Max.   :3.000  
##  NA's   :2026       NA's   :11      NA's   :11      NA's   :18     
##  education_level health_insurance   ethnicity      wealth_index  
##  Min.   :0.00    Min.   :0.0000   Min.   :1.000   Min.   :0.000  
##  1st Qu.:1.00    1st Qu.:1.0000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.00    Median :1.0000   Median :1.000   Median :2.000  
##  Mean   :2.46    Mean   :0.8659   Mean   :1.586   Mean   :2.494  
##  3rd Qu.:3.00    3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:4.000  
##  Max.   :5.00    Max.   :1.0000   Max.   :4.000   Max.   :5.000  
##  NA's   :525     NA's   :525      NA's   :1148                   
##  current_contraceptive_use awareness_hiv_aids used_computer_tablet
##  Min.   :0.0000            Min.   :0.0000     Min.   :0.0000      
##  1st Qu.:0.0000            1st Qu.:1.0000     1st Qu.:0.0000      
##  Median :1.0000            Median :1.0000     Median :0.0000      
##  Mean   :0.5842            Mean   :0.7975     Mean   :0.3412      
##  3rd Qu.:1.0000            3rd Qu.:1.0000     3rd Qu.:1.0000      
##  Max.   :1.0000            Max.   :1.0000     Max.   :1.0000      
##  NA's   :885               NA's   :541        NA's   :532         
##  used_internet    owns_mobile_phone
##  Min.   :0.0000   Min.   :0.0000   
##  1st Qu.:0.0000   1st Qu.:1.0000   
##  Median :1.0000   Median :1.0000   
##  Mean   :0.6394   Mean   :0.8831   
##  3rd Qu.:1.0000   3rd Qu.:1.0000   
##  Max.   :1.0000   Max.   :1.0000   
##  NA's   :2501     NA's   :529

Creating New Variable Access to Media

# Combine individual variables for access to the internet, phone, and computer into a single variable
# This new variable "access_to_media" will have a value of 1 if the respondent has access to any of these media sources, and 0 if not
# This provides a more comprehensive measure of media access
female_df <- female_df %>%
  mutate(access_to_media = ifelse(used_computer_tablet == 1 | used_internet == 1 | owns_mobile_phone == 1, 1, 0))

# In later analysis, use access_to_media instead of the 3 separate variables
# Exporting female_df to a CSV file in the current working directory
#write.csv(female_df, "female_df.csv", row.names = FALSE)

Distributions of Data

# Histogram for 'Age at First Marriage'
afm_hist <- ggplot(female_df, aes(x = age_first_marriage)) +
  geom_histogram(fill = "blue", color = "white") +
  theme_minimal() +
  ggtitle("Histogram of Age at First Marriage") +
  xlab("Age at First Marriage") +
  ylab("Frequency")

print(afm_hist)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2026 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Saving the histogram
#ggsave("histogram_age_first_marriage.png", plot = afm_hist, width = 8, height = 6, dpi = 300)
# Plotting a boxplot for 'Age at First Marriage'
ggplot(female_df, aes(y = age_first_marriage)) +
  geom_boxplot(fill = "cyan") +
  labs(title = "Boxplot of Age at First Marriage", y = "Age at First Marriage") +
  theme_minimal()
## Warning: Removed 2026 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Bar plot for 'used_internet'
ggplot(female_df, aes(x = used_internet)) +
  geom_bar(fill = "yellow") +
  theme_minimal() +
  ggtitle("Bar Plot of Used Internet") +
  xlab("Used Internet") +
  ylab("Count")
## Warning: Removed 2501 rows containing non-finite outside the scale range
## (`stat_count()`).

Handling Missing Data

# Before that, let's double check the count of missing values by column
count_missing <- sapply(female_df, function(x) sum(is.na(x)))
print(count_missing)
##        age_first_marriage                      area                    region 
##                      2026                        11                        11 
##            marital_status           education_level          health_insurance 
##                        18                       525                       525 
##                 ethnicity              wealth_index current_contraceptive_use 
##                      1148                         0                       885 
##        awareness_hiv_aids      used_computer_tablet             used_internet 
##                       541                       532                      2501 
##         owns_mobile_phone           access_to_media 
##                       529                       529
# Make a copy of female_df for imputation
imputed_df <- female_df
# Handle missing values in 'Age at First Marriage'

# Calculate the median value for 'Age at First Marriage', excluding NA values
median_age_first_marriage <- median(imputed_df$age_first_marriage, na.rm = TRUE)

# Impute missing values in 'Age at First Marriage' with the median value
imputed_df$age_first_marriage[is.na(imputed_df$age_first_marriage)] <- median_age_first_marriage
# Define a function to calculate mode for categorical variables
getMode <- function(v) {
  # The mode is the value that appears most frequently in the data
  uniqv <- unique(na.omit(v))  # Omit NA values and get unique values
  uniqv[which.max(tabulate(match(v, uniqv)))]  # Return the value with the highest frequency
}
# For 'ethnicity', an ordinal variable with predefined categories, it makes sense to impute missing values with the mode.
mode_ethnicity <- getMode(imputed_df$ethnicity)
imputed_df <- mutate(imputed_df, ethnicity = ifelse(is.na(ethnicity), mode_ethnicity, ethnicity))

# Binary variables like 'current_contraceptive_use', 'health_insurance','awareness_hiv_aids', 'used_internet', 'used_computer_tablet', 'owns_mobile_phone', and 'access_to_media'
# should be imputed with the mode since it represents the most frequent category (either 0 or 1).

# Calculate the mode for each binary variable
mode_used_internet <- getMode(imputed_df$used_internet)
mode_current_contraceptive_use <- getMode(imputed_df$current_contraceptive_use)
mode_health_insurance <- getMode(imputed_df$health_insurance)
mode_awareness_hiv_aids <- getMode(imputed_df$awareness_hiv_aids)
mode_used_computer_tablet <- getMode(imputed_df$used_computer_tablet)
mode_owns_mobile_phone <- getMode(imputed_df$owns_mobile_phone)
mode_access_to_media <- getMode(imputed_df$access_to_media)

# Impute missing values for binary variables
imputed_df <- mutate(imputed_df,
  used_internet = ifelse(is.na(used_internet), mode_used_internet, used_internet),
  current_contraceptive_use = ifelse(is.na(current_contraceptive_use), mode_current_contraceptive_use, current_contraceptive_use),
  health_insurance = ifelse(is.na(health_insurance), mode_health_insurance, health_insurance),
  awareness_hiv_aids = ifelse(is.na(awareness_hiv_aids), mode_awareness_hiv_aids, awareness_hiv_aids),
  used_computer_tablet = ifelse(is.na(used_computer_tablet), mode_used_computer_tablet, used_computer_tablet),
  owns_mobile_phone = ifelse(is.na(owns_mobile_phone), mode_owns_mobile_phone, owns_mobile_phone),
  access_to_media = ifelse(is.na(access_to_media), mode_access_to_media, access_to_media)
)

# 'education_level' is an ordinal variable where the median could be a more suitable measure of central tendency than the mode.
# However, given the categorical nature of the levels (e.g., "Primary", "Secondary"), using the mode may still be appropriate.
mode_education_level <- getMode(imputed_df$education_level)
imputed_df <- mutate(imputed_df, education_level = ifelse(is.na(education_level), mode_education_level, education_level))
# Check the resulting dataset to confirm changes
summary(imputed_df)
##  age_first_marriage      area           region      marital_status 
##  Min.   :10.00      Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:18.00      1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :20.00      Median :2.000   Median :3.000   Median :1.000  
##  Mean   :20.76      Mean   :1.683   Mean   :3.404   Mean   :1.408  
##  3rd Qu.:23.00      3rd Qu.:2.000   3rd Qu.:5.000   3rd Qu.:1.000  
##  Max.   :47.00      Max.   :2.000   Max.   :6.000   Max.   :3.000  
##                     NA's   :11      NA's   :11      NA's   :18     
##  education_level health_insurance   ethnicity      wealth_index  
##  Min.   :0.000   Min.   :0.0000   Min.   :1.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.0000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :1.0000   Median :1.000   Median :2.000  
##  Mean   :2.438   Mean   :0.8721   Mean   :1.527   Mean   :2.494  
##  3rd Qu.:3.000   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :1.0000   Max.   :4.000   Max.   :5.000  
##                                                                  
##  current_contraceptive_use awareness_hiv_aids used_computer_tablet
##  Min.   :0.0000            Min.   :0.0000     Min.   :0.0000      
##  1st Qu.:0.0000            1st Qu.:1.0000     1st Qu.:0.0000      
##  Median :1.0000            Median :1.0000     Median :0.0000      
##  Mean   :0.6168            Mean   :0.8072     Mean   :0.3251      
##  3rd Qu.:1.0000            3rd Qu.:1.0000     3rd Qu.:1.0000      
##  Max.   :1.0000            Max.   :1.0000     Max.   :1.0000      
##                                                                   
##  used_internet    owns_mobile_phone access_to_media 
##  Min.   :0.0000   Min.   :0.0000    Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:1.0000    1st Qu.:1.0000  
##  Median :1.0000   Median :1.0000    Median :1.0000  
##  Mean   :0.7192   Mean   :0.8886    Mean   :0.9071  
##  3rd Qu.:1.0000   3rd Qu.:1.0000    3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000    Max.   :1.0000  
## 
# After imputation, let's check for missing values
count_imputation <- sapply(imputed_df, function(x) sum(is.na(x)))
print(count_imputation)
##        age_first_marriage                      area                    region 
##                         0                        11                        11 
##            marital_status           education_level          health_insurance 
##                        18                         0                         0 
##                 ethnicity              wealth_index current_contraceptive_use 
##                         0                         0                         0 
##        awareness_hiv_aids      used_computer_tablet             used_internet 
##                         0                         0                         0 
##         owns_mobile_phone           access_to_media 
##                         0                         0

Visualization Comparing Before vs. After Imputation

# Histogram for 'age_first_marriage' before imputation
afm_0 <- ggplot(female_df, aes(x = age_first_marriage)) + 
  geom_histogram(fill = "lightpink", color = "white", bins = 30) +
  theme_light() +
  ggtitle("Age at First Marriage Before Imputation") +
  xlab("Age at First Marriage") +
  ylab("Frequency")

# Histogram for 'age_first_marriage' after imputation
afm_imputed <- ggplot(imputed_df, aes(x = age_first_marriage)) + 
  geom_histogram(fill = "plum", color = "white", bins = 30) +
  theme_light() +
  ggtitle("Age at First Marriage After Imputation") +
  xlab("Age at First Marriage") +
  ylab("Frequency")

# Arrange the two plots side by side
grid.arrange(afm_0, afm_imputed, ncol = 2)
## Warning: Removed 2026 rows containing non-finite outside the scale range
## (`stat_bin()`).

Creating Binary Variables for Child Marriage Under 18 and 15

# Convert "age at first marriage" into a binary variable to indicate child marriage
# Child marriage is defined as marriage before the age of 18
# The new binary variable "child_marriage" will have a value of 1 if the marriage occurred before age 18, and 0 otherwise
imputed_df <- imputed_df %>%
  mutate(child_marriage = ifelse(age_first_marriage < 18, 1, 0))
# Create a binary variable for child marriage under 15
# The new variable "child_marriage_u15" will have a value of 1 if the marriage occurred before age 15, and 0 otherwise
imputed_df <- imputed_df %>%
  mutate(child_marriage_u15 = ifelse(age_first_marriage < 15, 1, 0))
# Move "child_marriage" and "child_marriage_u15" to the front of the dataframe
imputed_df <- imputed_df %>%
  select(child_marriage, child_marriage_u15, everything())

Preparing for Regression Analysis

Correlation Matrix

# Calculate correlation matrix
cor_matrix <- cor(imputed_df %>% select_if(is.numeric), use = "complete.obs")

# Melt the correlation matrix
melted_cor_matrix <- melt(cor_matrix)
# Generate an interactive heatmap
corr_matrix <- ggplot(melted_cor_matrix, aes(Var1, Var2, fill = value)) +
    geom_tile() +
    scale_fill_gradientn(
        colours = c("deepskyblue", "white", "red2"),
        values = scales::rescale(c(-1, 0, 1)),
        limits = c(-1, 1),
        name="Pearson\nCorrelation"
    ) +
    theme_minimal() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab("") + 
    ylab("") +
    ggtitle("Correlation Matrix") 

# Convert ggplot object to plotly for interactivity
ggplotly(corr_matrix)

Accessing Multicollinearity

# Model with all predictors
model_for_vif <- lm(child_marriage ~ area + region + marital_status + education_level + health_insurance + ethnicity + wealth_index + current_contraceptive_use + awareness_hiv_aids + access_to_media, data = imputed_df)

# Calculate VIF (A VIF value > 5 indicates high multicollinearity)
vif_results <- vif(model_for_vif)
print(vif_results)
##                      area                    region            marital_status 
##                  1.276710                  1.111376                  1.556525 
##           education_level          health_insurance                 ethnicity 
##                  1.931734                  1.040051                  1.429007 
##              wealth_index current_contraceptive_use        awareness_hiv_aids 
##                  1.774619                  1.501796                  1.556100 
##           access_to_media 
##                  1.251097

Converting Categorical and Binary Variables to Factors

# Convert nominal and ordinal variables to factors
imputed_df$area <- as.factor(imputed_df$area)
imputed_df$region <- as.factor(imputed_df$region)
imputed_df$marital_status <- as.factor(imputed_df$marital_status)
imputed_df$education_level <- factor(imputed_df$education_level, ordered = TRUE)
imputed_df$ethnicity <- as.factor(imputed_df$ethnicity)
imputed_df$wealth_index <- factor(imputed_df$wealth_index, ordered = TRUE)

# Binary variables are already in the correct format and can be used as is

Baseline Logistic Regression Model

baseline_model <- glm(child_marriage ~ area + region + marital_status + education_level + health_insurance + ethnicity + wealth_index + current_contraceptive_use + awareness_hiv_aids + access_to_media, family = binomial(), data = imputed_df)

# Check the model summary to interpret the coefficients
summary(baseline_model)
## 
## Call:
## glm(formula = child_marriage ~ area + region + marital_status + 
##     education_level + health_insurance + ethnicity + wealth_index + 
##     current_contraceptive_use + awareness_hiv_aids + access_to_media, 
##     family = binomial(), data = imputed_df)
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -2.69204    0.19574 -13.753  < 2e-16 ***
## area2                       0.23405    0.08675   2.698 0.006972 ** 
## region2                     0.31846    0.13180   2.416 0.015683 *  
## region3                     0.11824    0.13020   0.908 0.363836    
## region4                     0.28828    0.12882   2.238 0.025226 *  
## region5                    -0.01239    0.12758  -0.097 0.922641    
## region6                    -0.07003    0.13543  -0.517 0.605094    
## marital_status2             0.19873    0.12129   1.639 0.101311    
## marital_status3           -16.69086  132.38178  -0.126 0.899668    
## education_level.L          -2.63955    0.24416 -10.811  < 2e-16 ***
## education_level.Q          -0.79978    0.15213  -5.257 1.46e-07 ***
## education_level.C           0.73946    0.28177   2.624 0.008681 ** 
## education_level^4           0.81761    0.29403   2.781 0.005423 ** 
## education_level^5           0.25180    0.17168   1.467 0.142454    
## health_insurance            0.03453    0.08420   0.410 0.681703    
## ethnicity2                 -0.02966    0.10707  -0.277 0.781759    
## ethnicity3                  0.27077    0.13057   2.074 0.038108 *  
## ethnicity4                  1.21560    0.10944  11.107  < 2e-16 ***
## wealth_index.L             -0.43723    0.13081  -3.343 0.000830 ***
## wealth_index.Q             -0.18217    0.11943  -1.525 0.127181    
## wealth_index.C              0.27357    0.10035   2.726 0.006408 ** 
## wealth_index^4             -0.32427    0.08513  -3.809 0.000139 ***
## wealth_index^5             -0.10307    0.07948  -1.297 0.194712    
## current_contraceptive_use   0.09590    0.06954   1.379 0.167884    
## awareness_hiv_aids         -0.15285    0.07775  -1.966 0.049312 *  
## access_to_media            -0.06893    0.08757  -0.787 0.431210    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 10432.2  on 11275  degrees of freedom
## Residual deviance:  7737.2  on 11250  degrees of freedom
##   (18 observations deleted due to missingness)
## AIC: 7789.2
## 
## Number of Fisher Scoring iterations: 17